{
 "cells": [
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## 1.加载向量模型",
   "id": "4c45c45c8e47a8ee"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-07T14:11:29.508363Z",
     "start_time": "2025-04-07T14:11:29.382015Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# pip install langchain\n",
    "# pip install langchain-community\n",
    "# pip install langchain_ollama\n",
    "from langchain.chains.combine_documents.stuff import StuffDocumentsChain\n",
    "from langchain.chains.llm import LLMChain\n",
    "# pip install sentence-transformers\n",
    "from langchain_community.document_transformers import LongContextReorder\n",
    "from langchain_community.vectorstores import Chroma\n",
    "from langchain_ollama import OllamaEmbeddings\n",
    "embedding_model = OllamaEmbeddings(\n",
    "    # model=\"qllama/bce-embedding-base_v1\",\n",
    "    model=\"quentinz/bge-large-zh-v1.5:latest\",\n",
    "    base_url=\"http://10.2.4.31:11434\",\n",
    ")"
   ],
   "id": "299798499f2f3be2",
   "outputs": [],
   "execution_count": 38
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## 2.长上下文精度问题",
   "id": "af693b7d03e8a5cb"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-07T14:26:27.497130Z",
     "start_time": "2025-04-07T14:26:26.959089Z"
    }
   },
   "cell_type": "code",
   "source": [
    "import ollama\n",
    "import chromadb\n",
    "from langchain.chains.combine_documents.stuff import StuffDocumentsChain\n",
    "from langchain.chains.llm import LLMChain\n",
    "from langchain.document_transformers import LongContextReorder\n",
    "\n",
    "from langchain.vectorstores import Chroma\n",
    "\n",
    "texts = [\n",
    "    \"篮球是一项伟大的运动\",\n",
    "    \"带我飞往月球是我最喜欢的歌曲之一\",\n",
    "    \"凯尔特人队，是我最喜欢的球队\",\n",
    "    \"这是一篇关于凯尔特人的文章\",\n",
    "    \"我非常喜欢去看电影。\",\n",
    "    \"波士顿凯尔特人队以20分的优势赢了比赛\",\n",
    "    \"这只是一段随机的文字。\",\n",
    "    \"《艾尔登之环》是过去15年最好的游戏之一\",\n",
    "    \"L.科内特是凯尔特人队最好的球员之一\",\n",
    "    \"拉里.博德是一位标志性的NBA球员\"\n",
    "]\n",
    "# 解决问题：Embedding dimension 1024 does not match collection dimensionality 768\n",
    "# 删除向量\n",
    "# import chromadb\n",
    "# client = chromadb.Client()\n",
    "# results = client.list_collections()\n",
    "# for result in results:\n",
    "#     client.delete_collection(result)\n",
    "# print(client.list_collections())    \n",
    "retriever = Chroma.from_texts(texts, embedding_model).as_retriever(\n",
    "    search_kwargs = {'k': 20}\n",
    ")\n",
    "query = \"关于凯尔特人队的事情？\"\n",
    "docs = retriever.invoke(query)\n",
    "docs"
   ],
   "id": "3913fbe328720e0b",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={}, page_content='这是一篇关于凯尔特人的文章'),\n",
       " Document(metadata={}, page_content='这是一篇关于凯尔特人的文章'),\n",
       " Document(metadata={}, page_content='这是一篇关于凯尔特人的文章'),\n",
       " Document(metadata={}, page_content='这是一篇关于凯尔特人的文章'),\n",
       " Document(metadata={}, page_content='凯尔特人队，是我最喜欢的球队'),\n",
       " Document(metadata={}, page_content='凯尔特人队，是我最喜欢的球队'),\n",
       " Document(metadata={}, page_content='凯尔特人队，是我最喜欢的球队'),\n",
       " Document(metadata={}, page_content='凯尔特人队，是我最喜欢的球队'),\n",
       " Document(metadata={}, page_content='L.科内特是凯尔特人队最好的球员之一'),\n",
       " Document(metadata={}, page_content='L.科内特是凯尔特人队最好的球员之一'),\n",
       " Document(metadata={}, page_content='L.科内特是凯尔特人队最好的球员之一'),\n",
       " Document(metadata={}, page_content='L.科内特是凯尔特人队最好的球员之一'),\n",
       " Document(metadata={}, page_content='波士顿凯尔特人队以20分的优势赢了比赛'),\n",
       " Document(metadata={}, page_content='波士顿凯尔特人队以20分的优势赢了比赛'),\n",
       " Document(metadata={}, page_content='波士顿凯尔特人队以20分的优势赢了比赛'),\n",
       " Document(metadata={}, page_content='波士顿凯尔特人队以20分的优势赢了比赛'),\n",
       " Document(metadata={}, page_content='篮球是一项伟大的运动'),\n",
       " Document(metadata={}, page_content='篮球是一项伟大的运动'),\n",
       " Document(metadata={}, page_content='篮球是一项伟大的运动'),\n",
       " Document(metadata={}, page_content='篮球是一项伟大的运动')]"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 63
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-07T14:26:33.459409Z",
     "start_time": "2025-04-07T14:26:33.451963Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 对检索结果进行排序\n",
    "# 问题相关性越低的内容放在中间\n",
    "# 问题相关性越高的内容放在首尾\n",
    "recordings = LongContextReorder()\n",
    "re_docs = recordings.transform_documents(docs)\n",
    "re_docs"
   ],
   "id": "b4bbb098d251454d",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={}, page_content='这是一篇关于凯尔特人的文章'),\n",
       " Document(metadata={}, page_content='这是一篇关于凯尔特人的文章'),\n",
       " Document(metadata={}, page_content='凯尔特人队，是我最喜欢的球队'),\n",
       " Document(metadata={}, page_content='凯尔特人队，是我最喜欢的球队'),\n",
       " Document(metadata={}, page_content='L.科内特是凯尔特人队最好的球员之一'),\n",
       " Document(metadata={}, page_content='L.科内特是凯尔特人队最好的球员之一'),\n",
       " Document(metadata={}, page_content='波士顿凯尔特人队以20分的优势赢了比赛'),\n",
       " Document(metadata={}, page_content='波士顿凯尔特人队以20分的优势赢了比赛'),\n",
       " Document(metadata={}, page_content='篮球是一项伟大的运动'),\n",
       " Document(metadata={}, page_content='篮球是一项伟大的运动'),\n",
       " Document(metadata={}, page_content='篮球是一项伟大的运动'),\n",
       " Document(metadata={}, page_content='篮球是一项伟大的运动'),\n",
       " Document(metadata={}, page_content='波士顿凯尔特人队以20分的优势赢了比赛'),\n",
       " Document(metadata={}, page_content='波士顿凯尔特人队以20分的优势赢了比赛'),\n",
       " Document(metadata={}, page_content='L.科内特是凯尔特人队最好的球员之一'),\n",
       " Document(metadata={}, page_content='L.科内特是凯尔特人队最好的球员之一'),\n",
       " Document(metadata={}, page_content='凯尔特人队，是我最喜欢的球队'),\n",
       " Document(metadata={}, page_content='凯尔特人队，是我最喜欢的球队'),\n",
       " Document(metadata={}, page_content='这是一篇关于凯尔特人的文章'),\n",
       " Document(metadata={}, page_content='这是一篇关于凯尔特人的文章')]"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 64
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-07T14:27:30.865307Z",
     "start_time": "2025-04-07T14:27:28.747629Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from langchain_core.messages import SystemMessage, HumanMessage\n",
    "from langchain_community.chat_models import ChatOpenAI\n",
    "from langchain.prompts import PromptTemplate\n",
    "model = ChatOpenAI(model=\"qwen2.5\", openai_api_key=\"ollama\", openai_api_base=\"http://10.2.4.31:11434/v1/\")\n",
    "document_prompt = PromptTemplate(\n",
    "    input_variables = [\"page_content\"], template=\"{page_content}\"\n",
    ")\n",
    "\n",
    "stuff_prompt_override = \"\"\"从文本中提取:\n",
    "-----\n",
    "{context}\n",
    "-----\n",
    "请回答以下问题:\n",
    "{query}\n",
    "\"\"\"\n",
    "\n",
    "prompt = PromptTemplate(\n",
    "    template=stuff_prompt_override,\n",
    "    input_variables=[\"context\", \"query\"],\n",
    ")\n",
    "\n",
    "llm_chain = LLMChain(\n",
    "    llm = model,\n",
    "    prompt = prompt\n",
    ")\n",
    "\n",
    "worker_chain = StuffDocumentsChain(\n",
    "    llm_chain = llm_chain,\n",
    "    document_prompt=document_prompt, # 从文档中提取属性page_content，作为context参数\n",
    "    document_variable_name=\"context\"\n",
    ")\n",
    "worker_chain.run(\n",
    "    input_documents=re_docs,\n",
    "    query=\"凯尔特人队是哪里的球队？\"\n",
    ")"
   ],
   "id": "a4e8f6c5f72f7a94",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'根据提供的文本信息，波士顿凯尔特人队是一个球队。可以推断出凯尔特人队是来自美国波士顿的球队。'"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 66
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "",
   "id": "475a839b0e5042d6",
   "outputs": [],
   "execution_count": null
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pytorch",
   "language": "python",
   "name": "pytorch"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
